Regression: Mediation and Moderation

Erin M. Buchanan

Last Updated: 2021-02-06

What is Mediation?

Baron & Kenny (1986)

Baron & Kenny (1986)

Baron & Kenny (1986): Limitations

Sobel Test

Bootstrapping

Mediation: Data Screening

Mediation: Example

library(rio)
master <- import("data/mediation.sav")
str(master)
## 'data.frame':    240 obs. of  3 variables:
##  $ previous: num  3 2 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##   ..- attr(*, "display_width")= int 14
##  $ facebook: num  3.67 5 4 4.5 4.5 ...
##   ..- attr(*, "format.spss")= chr "F8.2"
##   ..- attr(*, "display_width")= int 14
##  $ exam    : num  1 1 2 7 6 1 2 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F8.2"
##   ..- attr(*, "display_width")= int 10
summary(master)
##     previous        facebook          exam       
##  Min.   :1.000   Min.   :1.750   Min.   : 1.000  
##  1st Qu.:1.000   1st Qu.:3.750   1st Qu.: 1.000  
##  Median :1.000   Median :4.250   Median : 1.000  
##  Mean   :1.571   Mean   :4.143   Mean   : 1.825  
##  3rd Qu.:2.000   3rd Qu.:4.750   3rd Qu.: 2.000  
##  Max.   :7.000   Max.   :5.000   Max.   :11.000  
##                  NA's   :1
master <- na.omit(master)

Mediation: c Path

model1 <- lm(exam ~ previous, data = master)
summary(model1)
## 
## Call:
## lm(formula = exam ~ previous, data = master)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9092 -0.6477 -0.6477  0.3523  8.4062 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.33228    0.16749   7.955 7.37e-14 ***
## previous     0.31539    0.08689   3.630 0.000348 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.496 on 237 degrees of freedom
## Multiple R-squared:  0.05266,    Adjusted R-squared:  0.04867 
## F-statistic: 13.17 on 1 and 237 DF,  p-value: 0.0003476

Mediation: a Path

model2 <- lm(facebook ~ previous, data = master)
summary(model2)
## 
## Call:
## lm(formula = facebook ~ previous, data = master)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.44609 -0.44609  0.05391  0.55391  1.35640 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.28817    0.08195  52.330   <2e-16 ***
## previous    -0.09208    0.04251  -2.166   0.0313 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.732 on 237 degrees of freedom
## Multiple R-squared:  0.01941,    Adjusted R-squared:  0.01527 
## F-statistic: 4.692 on 1 and 237 DF,  p-value: 0.03131

Mediation: b, c’ Path

model3 <- lm(exam ~ previous + facebook, data = master)
summary(model3)
## 
## Call:
## lm(formula = exam ~ previous + facebook, data = master)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6065 -0.7666 -0.3117  0.3850  7.9999 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.93294    0.56791   6.925 4.09e-11 ***
## previous     0.25954    0.08397   3.091  0.00224 ** 
## facebook    -0.60647    0.12705  -4.773 3.18e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.432 on 236 degrees of freedom
## Multiple R-squared:  0.1361, Adjusted R-squared:  0.1288 
## F-statistic: 18.59 on 2 and 236 DF,  p-value: 3.194e-08

Mediation: Interpretation

Mediation: Sobel Test

#aroian sobel
a <- coef(model2)[2]
b <- coef(model3)[3]
SEa <- summary(model2)$coefficients[2,2]
SEb <- summary(model3)$coefficients[3,2]
zscore <- (a*b)/(sqrt((b^2*SEa^2)+(a^2*SEb^2)+(SEa^2*SEb^2)))
zscore
## previous 
## 1.937494
#two tailed test 
pnorm(abs(zscore), lower.tail = F)*2
##   previous 
## 0.05268497

Mediation: Sobel Test

Mediation: All Together + Bootstrapping

#devtools::install_github("doomlab/MeMoBootR")
library(MeMoBootR)
## Loading required package: boot
## Loading required package: diagram
## Loading required package: shape
## Loading required package: ggplot2
#no missing data allowed
med_results <- mediation1(y = "exam",
                          x = "previous", 
                          m = "facebook", 
                          df = master)

Mediation: MeMoBootR

head(med_results$datascreening$fulldata)
##   previous facebook exam badmahal badleverage badcooks totalout
## 1        3 3.666667    1        0           0        0        0
## 2        2 5.000000    1        0           0        0        0
## 3        1 4.000000    2        0           0        0        0
## 4        1 4.500000    7        0           0        1        1
## 5        1 4.500000    6        0           0        1        1
## 6        1 4.500000    1        0           0        0        0
med_results$datascreening$correl
##             (Intercept)   previous   facebook
## (Intercept)   1.0000000 -0.3617623 -0.9593469
## previous     -0.3617623  1.0000000  0.1393247
## facebook     -0.9593469  0.1393247  1.0000000
med_results$datascreening$linearity

med_results$datascreening$normality

med_results$datascreening$homogen

Mediation: MeMoBootR

summary(med_results$model1)
## 
## Call:
## lm(formula = allformulas$eq1, data = finaldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9092 -0.6477 -0.6477  0.3523  8.4062 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.33228    0.16749   7.955 7.37e-14 ***
## previous     0.31539    0.08689   3.630 0.000348 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.496 on 237 degrees of freedom
## Multiple R-squared:  0.05266,    Adjusted R-squared:  0.04867 
## F-statistic: 13.17 on 1 and 237 DF,  p-value: 0.0003476
summary(med_results$model2)
## 
## Call:
## lm(formula = allformulas$eq2, data = finaldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.44609 -0.44609  0.05391  0.55391  1.35640 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.28817    0.08195  52.330   <2e-16 ***
## previous    -0.09208    0.04251  -2.166   0.0313 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.732 on 237 degrees of freedom
## Multiple R-squared:  0.01941,    Adjusted R-squared:  0.01527 
## F-statistic: 4.692 on 1 and 237 DF,  p-value: 0.03131
summary(med_results$model3)
## 
## Call:
## lm(formula = allformulas$eq3, data = finaldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6065 -0.7666 -0.3117  0.3850  7.9999 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.93294    0.56791   6.925 4.09e-11 ***
## previous     0.25954    0.08397   3.091  0.00224 ** 
## facebook    -0.60647    0.12705  -4.773 3.18e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.432 on 236 degrees of freedom
## Multiple R-squared:  0.1361, Adjusted R-squared:  0.1288 
## F-statistic: 18.59 on 2 and 236 DF,  p-value: 3.194e-08

Mediation: MeMoBootR

med_results$indirect.effect
##   previous 
## 0.05584541
med_results$z.score
## previous 
## 1.937494
med_results$p.value
##   previous 
## 0.05268497

Mediation: MeMoBootR

med_results$boot.results
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = finaldata, statistic = indirectmed, R = nboot, formula2 = allformulas$eq2, 
##     formula3 = allformulas$eq3, x = x, med.var = m)
## 
## 
## Bootstrap Statistics :
##       original     bias    std. error
## t1* 0.05584541 0.00102295  0.03210787
med_results$boot.ci
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = bootresults, conf = conf_level, type = "norm")
## 
## Intervals : 
## Level      Normal        
## 95%   (-0.0081,  0.1178 )  
## Calculations and Intervals on Original Scale
med_results$diagram

Moderation

Conceptual Moderation Model

Conceptual Moderation Model

Conceptual Moderation Model

Statistical Moderation Model

Centering variables

Centering variables

Moderation: Simple Slopes

Moderation: Simple Slopes

Moderation: Simple Slopes

master <- import("data/moderation.sav")
str(master)
## 'data.frame':    442 obs. of  4 variables:
##  $ ID        : num  69 55 7 96 130 124 72 139 102 179 ...
##   ..- attr(*, "label")= chr "Case ID"
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ Aggression: num  13 38 30 23 25 46 41 22 35 23 ...
##   ..- attr(*, "label")= chr "Agression"
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ Vid_Games : num  16 12 32 10 11 29 23 15 20 20 ...
##   ..- attr(*, "label")= chr "Video Games (Hours per week)"
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ CaUnTs    : num  0 0 0 1 1 1 2 3 3 3 ...
##   ..- attr(*, "label")= chr "Callous Unemotional Traits"
##   ..- attr(*, "format.spss")= chr "F2.0"

Moderation: Centering

#center the X and M variable (NOT THE DV)
#when you create these use the final dataset 
master$zvid = scale(master$Vid_Games, scale = F) #mean center, not z score
master$zcal = scale(master$CaUnTs, scale = F)

Moderation: Running

#run the model to see if the moderation is significant
#use the z score variables!
#be sure to do X*M so the graph is right 
modmodel <- lm(Aggression ~ zvid*zcal, data = master)

Moderation: Interpretation

summary(modmodel)
## 
## Call:
## lm(formula = Aggression ~ zvid * zcal, data = master)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.7144  -6.9087  -0.1923   6.9036  29.2290 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.967108   0.475057  84.131  < 2e-16 ***
## zvid         0.169625   0.068473   2.477 0.013616 *  
## zcal         0.760093   0.049458  15.368  < 2e-16 ***
## zvid:zcal    0.027062   0.006981   3.877 0.000122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.976 on 438 degrees of freedom
## Multiple R-squared:  0.3773, Adjusted R-squared:  0.373 
## F-statistic: 88.46 on 3 and 438 DF,  p-value: < 2.2e-16

Moderation: Simple Slopes

Moderation: Simple Slopes

#create the low and high z score variables 
master$zcallow <- master$zcal + sd(master$zcal) #bring them up
master$zcalhigh <- master$zcal - sd(master$zcal) #bring them down
summary(master)
##        ID          Aggression      Vid_Games         CaUnTs    
##  Min.   :  1.0   Min.   : 9.00   Min.   : 1.00   Min.   : 0.0  
##  1st Qu.:111.2   1st Qu.:32.00   1st Qu.:17.00   1st Qu.:11.0  
##  Median :221.5   Median :40.00   Median :22.00   Median :18.0  
##  Mean   :221.5   Mean   :40.05   Mean   :21.84   Mean   :18.6  
##  3rd Qu.:331.8   3rd Qu.:48.00   3rd Qu.:26.00   3rd Qu.:26.0  
##  Max.   :442.0   Max.   :82.00   Max.   :38.00   Max.   :43.0  
##        zvid.V1              zcal.V1            zcallow.V1    
##  Min.   :-20.843891   Min.   :-18.595023   Min.   :-8.97733  
##  1st Qu.: -4.843891   1st Qu.: -7.595023   1st Qu.: 2.02267  
##  Median :  0.156109   Median : -0.595023   Median : 9.02267  
##  Mean   :  0.000000   Mean   :  0.000000   Mean   : 9.61769  
##  3rd Qu.:  4.156109   3rd Qu.:  7.404977   3rd Qu.:17.02267  
##  Max.   : 16.156109   Max.   : 24.404977   Max.   :34.02267  
##      zcalhigh.V1     
##  Min.   :-28.212716  
##  1st Qu.:-17.212716  
##  Median :-10.212716  
##  Mean   : -9.617693  
##  3rd Qu.: -2.212716  
##  Max.   : 14.787284

Moderation: Simple Slopes

#run the models
#be sure to X*M
modmodellow <- lm(Aggression ~ zvid*zcallow, data = master)
modmodelhigh <- lm(Aggression ~ zvid*zcalhigh, data = master)

Moderation: Simple Slopes

summary(modmodellow) #low slope
## 
## Call:
## lm(formula = Aggression ~ zvid * zcallow, data = master)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.7144  -6.9087  -0.1923   6.9036  29.2290 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  32.656771   0.672018  48.595  < 2e-16 ***
## zvid         -0.090651   0.099105  -0.915 0.360854    
## zcallow       0.760093   0.049458  15.368  < 2e-16 ***
## zvid:zcallow  0.027062   0.006981   3.877 0.000122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.976 on 438 degrees of freedom
## Multiple R-squared:  0.3773, Adjusted R-squared:  0.373 
## F-statistic: 88.46 on 3 and 438 DF,  p-value: < 2.2e-16

Moderation: Simple Slopes

summary(modmodel) #average slope
## 
## Call:
## lm(formula = Aggression ~ zvid * zcal, data = master)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.7144  -6.9087  -0.1923   6.9036  29.2290 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.967108   0.475057  84.131  < 2e-16 ***
## zvid         0.169625   0.068473   2.477 0.013616 *  
## zcal         0.760093   0.049458  15.368  < 2e-16 ***
## zvid:zcal    0.027062   0.006981   3.877 0.000122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.976 on 438 degrees of freedom
## Multiple R-squared:  0.3773, Adjusted R-squared:  0.373 
## F-statistic: 88.46 on 3 and 438 DF,  p-value: < 2.2e-16

Moderation: Simple Slopes

summary(modmodelhigh) #high slope
## 
## Call:
## lm(formula = Aggression ~ zvid * zcalhigh, data = master)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.7144  -6.9087  -0.1923   6.9036  29.2290 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   47.277446   0.672520  70.299  < 2e-16 ***
## zvid           0.429901   0.092577   4.644 4.53e-06 ***
## zcalhigh       0.760093   0.049458  15.368  < 2e-16 ***
## zvid:zcalhigh  0.027062   0.006981   3.877 0.000122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.976 on 438 degrees of freedom
## Multiple R-squared:  0.3773, Adjusted R-squared:  0.373 
## F-statistic: 88.46 on 3 and 438 DF,  p-value: < 2.2e-16

Moderation: Simple Slopes

Moderation: Graphing

library(ggplot2)
cleanup <- theme(panel.grid.major = element_blank(), 
                panel.grid.minor = element_blank(), 
                panel.background = element_blank(), 
                axis.line.x = element_line(color = "black"),
                axis.line.y = element_line(color = "black"),
                legend.key = element_rect(fill = "white"),
                text = element_text(size = 15))

modgraph <- ggplot(master, aes(zvid, Aggression))

##change Cal to the new moderator label
##change xlab for the new X label
modgraph + 
  xlab("Centered Video Games") + 
  geom_point(color = "gray") +
  geom_abline(aes(intercept = modmodellow$coefficients[1],
                  slope = modmodellow$coefficients[2], 
                  linetype = "-1SD Cal"), size = 1) +
  geom_abline(aes(intercept = modmodel$coefficients[1],
                  slope = modmodel$coefficients[2], 
                  linetype = "Average Cal"), size = 1) +
  geom_abline(aes(intercept = modmodelhigh$coefficients[1],
                  slope = modmodelhigh$coefficients[2], 
                  linetype = "+1SD Cal"), size = 1) +
  scale_linetype_manual(values = c("dotted", "dashed", "solid"),
                        breaks = c("-1SD Cal", "Average Cal", "+1SD Cal"),
                        name = "Simple Slope") +
  cleanup 

Moderation: MeMoBootR

mod_model <- moderation1(y = "Aggression",
                         x = "Vid_Games",
                         m = "CaUnTs",
                         df = master)

Moderation: MeMoBootR

#data screening
head(mod_model$datascreening$fulldata)
##    ID Aggression Vid_Games CaUnTs       zvid      zcal  zcallow  zcalhigh
## 1  69         13        16      0  -5.843891 -18.59502 -8.97733 -28.21272
## 2  55         38        12      0  -9.843891 -18.59502 -8.97733 -28.21272
## 3   7         30        32      0  10.156109 -18.59502 -8.97733 -28.21272
## 4  96         23        10      1 -11.843891 -17.59502 -7.97733 -27.21272
## 5 130         25        11      1 -10.843891 -17.59502 -7.97733 -27.21272
## 6 124         46        29      1   7.156109 -17.59502 -7.97733 -27.21272
##   badmahal badleverage badcooks totalout
## 1        0           0        1        1
## 2        0           1        0        1
## 3        0           1        0        1
## 4        0           1        0        1
## 5        0           1        0        1
## 6        0           1        1        2
#mod_model$datascreening$correl
#mod_model$datascreening$linearity
#mod_model$datascreening$normality
#mod_model$datascreening$homogen

#models
summary(mod_model$model1)
## 
## Call:
## lm(formula = allformulas$eq1, data = finaldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.7144  -6.9087  -0.1923   6.9036  29.2290 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      39.967108   0.475057  84.131  < 2e-16 ***
## Vid_Games         0.169625   0.068473   2.477 0.013616 *  
## CaUnTs            0.760093   0.049458  15.368  < 2e-16 ***
## Vid_Games:CaUnTs  0.027062   0.006981   3.877 0.000122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.976 on 438 degrees of freedom
## Multiple R-squared:  0.3773, Adjusted R-squared:  0.373 
## F-statistic: 88.46 on 3 and 438 DF,  p-value: < 2.2e-16
#summary(mod_model$model1low)
#summary(mod_model$model1high)
mod_model$interpretation
## [1] "At low levels of CaUnTs, you see that every unit increase in Vid_Games predicts -0.09 unit change in Aggression. \n\nAt average levels of CaUnTs, you see that every unit increase in Vid_Games predicts 0.17 unit change in Aggression. \n\nAt high levels of CaUnTs, you see that every unit increase in Vid_Games predicts 0.43 unit change in Aggression."
#graphs
mod_model$graphslopes

Summary